library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)
RRPLOTS and flchain
odata <- flchain
odata$chapter <- NULL
pander::pander(table(odata$death))
rownames(odata) <- c(1:nrow(odata))
data <- as.data.frame(model.matrix(Surv(futime,death)~.,odata))
data$`(Intercept)` <- NULL
dataFL <- as.data.frame(cbind(time=odata[rownames(data),"futime"],
status=odata[rownames(data),"death"],
data))
pander::pander(table(dataFL$status))
dataFL$time <- dataFL$time/365
Exploring Raw Features with RRPlot
convar <- colnames(dataFL)[lapply(apply(dataFL,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(dataFL[,c("status",convar)],"status")
pander::pander(topvar)
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]
topFeature <- RRPlot(cbind(dataFL$status,dataFL[,topFive[1]]),
title=topFive[1])


par(op)
## With Survival Analysis
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
RRanalysis[[idx]] <- RRPlot(cbind(dataFL$status,dataFL[,topf]),
timetoEvent=dataFL$time,
atRate=c(0.90,0.80),
title=topf)
idx <- idx + 1
par(op)
}












names(RRanalysis) <- topFive
Reporting the Metrics
pander::pander(t(RRanalysis[[1]]$keyPoints),caption="Threshold values")
Threshold values
| Thr |
73.000 |
69.000 |
69.000 |
53.000 |
5.00e+01 |
| RR |
4.013 |
4.399 |
4.414 |
5.679 |
1.00e+00 |
| RR_LCI |
3.740 |
4.045 |
4.059 |
4.450 |
0.00e+00 |
| RR_UCI |
4.305 |
4.783 |
4.799 |
7.248 |
0.00e+00 |
| SEN |
0.581 |
0.713 |
0.713 |
0.968 |
1.00e+00 |
| SPE |
0.883 |
0.790 |
0.792 |
0.210 |
4.38e-04 |
| BACC |
0.732 |
0.752 |
0.752 |
0.589 |
5.00e-01 |
ROCAUC <- NULL
CstatCI <- NULL
RRatios <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL
for (topf in topFive)
{
CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
RRatios <- rbind(RRatios,RRanalysis[[topf]]$RR_atP)
LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive
pander::pander(ROCAUC)
| age |
0.822 |
0.810 |
0.833 |
| kappa |
0.682 |
0.667 |
0.696 |
| lambda |
0.665 |
0.650 |
0.680 |
| creatinine |
0.588 |
0.572 |
0.604 |
pander::pander(CstatCI)
| age |
0.774 |
0.775 |
0.765 |
0.785 |
| kappa |
0.671 |
0.671 |
0.658 |
0.684 |
| lambda |
0.657 |
0.657 |
0.645 |
0.669 |
| creatinine |
0.584 |
0.584 |
0.570 |
0.598 |
pander::pander(LogRangp)
| age |
0.00e+00 |
| kappa |
4.90e-175 |
| lambda |
4.41e-145 |
| creatinine |
2.67e-67 |
pander::pander(Sensitivity)
| age |
0.581 |
0.558 |
0.602 |
| kappa |
0.319 |
0.298 |
0.340 |
| lambda |
0.300 |
0.279 |
0.321 |
| creatinine |
0.288 |
0.269 |
0.309 |
pander::pander(Specificity)
| age |
0.883 |
0.873 |
0.892 |
| kappa |
0.900 |
0.891 |
0.908 |
| lambda |
0.899 |
0.890 |
0.907 |
| creatinine |
0.865 |
0.854 |
0.875 |
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe")
pander::pander(meanMatrix)
| age |
0.822 |
0.774 |
0.581 |
0.883 |
| kappa |
0.682 |
0.671 |
0.319 |
0.900 |
| lambda |
0.665 |
0.657 |
0.300 |
0.899 |
| creatinine |
0.588 |
0.584 |
0.288 |
0.865 |
Train Test Set
trainsamples <- sample(nrow(dataFL),0.5*nrow(dataFL))
dataFLTrain <- dataFL[trainsamples,]
dataFLTest <- dataFL[-trainsamples,]
pander::pander(table(dataFLTrain$status))
pander::pander(table(dataFLTest$status))
Cox Modeling
ml <- BSWiMS.model(Surv(time,status)~.,data=dataFLTrain,loops=0)
sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
| age |
0.1021 |
0.0736 |
0.0800 |
0.0871 |
0.712 |
0.610 |
| sexM |
0.3717 |
0.1800 |
0.3034 |
0.4458 |
0.531 |
0.712 |
| flc.grp |
0.0757 |
0.0216 |
0.0578 |
0.0895 |
0.610 |
0.712 |
| kappa |
0.1495 |
0.0922 |
0.1707 |
0.2978 |
0.666 |
0.706 |
Table continues below
| age |
0.707 |
0.735 |
0.632 |
0.736 |
0.19568 |
0.8480 |
| sexM |
0.707 |
0.511 |
0.740 |
0.736 |
0.00729 |
0.0429 |
| flc.grp |
0.707 |
0.632 |
0.739 |
0.736 |
0.00529 |
0.3065 |
| kappa |
0.707 |
0.639 |
0.736 |
0.736 |
0.00208 |
-0.0984 |
| age |
26.65 |
25.05 |
-1.04e-01 |
1 |
| sexM |
5.51 |
1.12 |
4.51e-03 |
1 |
| flc.grp |
3.92 |
8.13 |
3.27e-03 |
1 |
| kappa |
2.66 |
-2.57 |
-2.55e-05 |
1 |
Cox Model Performance
The evaluation of the raw Cox model with RRPlot()
timeinterval <- 5
h0 <- sum(dataFLTrain$status & dataFLTrain$time <= timeinterval)
h0 <- h0/sum((dataFLTrain$time > timeinterval) | (dataFLTrain$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
| 0.13 |
5 |
index <- predict(ml,dataFLTrain)
rdata <- cbind(dataFLTrain$status,ppoisGzero(index,h0))
rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
timetoEvent=dataFLTrain$time,
title="Train: Breast Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)






Time to event
toinclude <- rdata[,1]==1
obstiemToEvent <- dataFLTrain[,"time"]
summary(obstiemToEvent)
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000 8.146 11.789 10.043
13.060 14.079
tmin<-min(obstiemToEvent)
if (tmin < 0.01) tmin <- 0.01
tmax<-max(obstiemToEvent)
sum(toinclude)
[1] 948
timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
timetoEvent[is.infinite(timetoEvent)] <- 3*tmax
timetoEvent[timetoEvent > 3*tmax] <- 3*tmax
timetoEvent[timetoEvent < tmin] <- tmin
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
| timetoEvent[toinclude] |
0.256 |
0.0104 |
24.5 |
3.32e-103 |
Fitting linear model: obstiemToEvent[toinclude] ~ 0 +
timetoEvent[toinclude]
| 948 |
5.44 |
0.388 |
0.388 |
plot(timetoEvent,obstiemToEvent,
col=1+rdata[,1],
xlab="Expected time",
ylab="Observed time",
main="Expected vs. Observed",
xlim=c(tmin,tmax),
ylim=c(tmin,tmax),
log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
pch=c(1,1,-1),
col=c(1,2,"blue"),
lty=c(-1,-1,1)
)

MADerror2 <- mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude]))
pander::pander(MADerror2)
8.51
Cox Calibration
op <- par(no.readonly = TRUE)
calprob <- CoxRiskCalibration(ml,dataFLTrain,"status","time")
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
DeltaTime=calprob$timeInterval),
caption="Cox Calibration Parameters")
Time to event after calibration
timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
| timetoEvent[toinclude] |
0.0995 |
0.00523 |
19 |
1.06e-68 |
Fitting linear model: obstiemToEvent[toinclude] ~ 0 +
timetoEvent[toinclude]
| 948 |
5.91 |
0.277 |
0.276 |
plot(timetoEvent,obstiemToEvent,
col=1+rdata[,1],
xlab="Expected time",
ylab="Observed time",
main="Expected vs. Observed",
xlim=c(tmin,tmax),
ylim=c(tmin,tmax),
log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
pch=c(1,1,-1),
col=c(1,2,"blue"),
lty=c(-1,-1,1)
)

MADerror2 <- c(MADerror2,mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude])))
pander::pander(MADerror2)
8.51 and 17.09